{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "from promptify.models.nlp.openai_model import OpenAI\n",
    "from promptify.prompts.nlp.prompter import Prompter"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = OpenAI(api_key=\"\")\n",
    "nlp_prompter = Prompter(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json.load(open(\"data/binary.json\",'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "10"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'text': 'Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.', 'labels': 'negative', 'score': '', 'complexity': ''} \n",
      "\n",
      "{'text': 'Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.', 'labels': 'positive', 'score': '', 'complexity': ''} \n",
      "\n",
      "{'text': 'So disappointed in wwe summerslam! I want to see john cena wins his 16th title', 'labels': 'negative', 'score': '', 'complexity': ''} \n",
      "\n"
     ]
    }
   ],
   "source": [
    "examples = []\n",
    "for sample in data[:3]:\n",
    "    print(sample,\"\\n\")\n",
    "    examples.append((sample['text'],sample['labels']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Binary Classificaion System\n",
      "You are a highly intelligent and accurate Binary Classification system. You take Passage as input and classify that as either positive or negative Category. Your output format is only [{'C':Category}] form, no other form.\n",
      "\n",
      "Examples:\n",
      "\n",
      "Input: Eight years the republicans denied obama’s picks. Breitbarters outrage is as phony as their fake president.\n",
      "Output: [{'C': 'negative' }]\n",
      "\n",
      "Input: Except he’s the most successful president in our lifetimes. He’s undone most of the damage Obummer did and set America on the right path again.\n",
      "Output: [{'C': 'positive' }]\n",
      "\n",
      "Input: So disappointed in wwe summerslam! I want to see john cena wins his 16th title\n",
      "Output: [{'C': 'negative' }]\n",
      "\n",
      "Input: Amazing customer service.\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "prompt = nlp_prompter.generate_prompt('binary_classification.jinja',\n",
    "                                      label_0=\"positive\",\n",
    "                                      label_1=\"negative\",\n",
    "                                      examples=examples,\n",
    "                                      text_input=\"Amazing customer service.\",\n",
    "                                     description=\"Binary Classificaion System\")\n",
    "print(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "output = nlp_prompter.fit('binary_classification.jinja',\n",
    "                 label_0=\"positive\",\n",
    "                 label_1=\"negative\",\n",
    "                 examples=examples,\n",
    "                 text_input=\"Amazing customer service.\",\n",
    "                 model_name=\"text-davinci-003\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'C': 'positive'}]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eval(output['text'].strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json.load(open(\"data/multiclass.json\",'r'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'hate', 'joy', 'neutral', 'sadness', 'surprise', 'worry'}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "labels = set(sample['category'] for sample in data)\n",
    "labels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You are a highly intelligent and accurate Multiclass Classification system. You take Passage as input and classify that as one of the following appropriate Categories:\n",
      "{'surprise', 'neutral', 'hate', 'joy', 'worry', 'sadness'}\n",
      "Your output format is only [{{'C': Appropriate Category from the list of provided Categories}}] form, no other form.\n",
      "\n",
      "Input: Amazing customer service.\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "prompt = nlp_prompter.generate_prompt('multiclass_classification.jinja',\n",
    "                                      labels=labels,\n",
    "                                      text_input=\"Amazing customer service.\",\n",
    "                                     )\n",
    "print(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "output = nlp_prompter.fit('multiclass_classification.jinja',\n",
    "                          labels=labels,\n",
    "                          text_input=\"Amazing customer service.\",\n",
    "                          model_name=\"text-davinci-003\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'C': 'joy'}]"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eval(output['text'].strip())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = json.load(open(\"data/paragraph.json\",'r'))['samples']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = json.load(open(\"data/optimized_multilabel.json\",'r'))['samples']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [],
   "source": [
    "examples = []\n",
    "\n",
    "for sample in data:\n",
    "    for label in labels:\n",
    "        if sample['id'] == label['id']:\n",
    "            examples.append((sample['paragraph'],json.dumps(label['data'])))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "('Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-derived CSF samples of idiopathic intracranial hypertension (IIH)',\n",
       " '[{\"main class\": \"Health\", \"1\": \"Medicine\", \"2\": \"Oncology\", \"3\": \"Metastasis\", \"4\": \"Breast cancer\", \"5\": \"Lung cancer\", \"6\": \"Cerebrospinal fluid\", \"7\": \"Tumor microenvironment\", \"8\": \"Single-cell RNA sequencing\", \"9\": \"Idiopathic intracranial hypertension\", \"branch\": \"Health\", \"group\": \"Clinical medicine\"}]')"
      ]
     },
     "execution_count": 55,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "examples[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "labels = ['Medicine','Oncology','Metastasis','Breast cancer','Lung cancer','Cerebrospinal fluid','Tumor microenvironment','Single-cell RNA sequencing','Idiopathic intracranial hypertension']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You are a highly intelligent and accurate healthcare domain multi-label classification system. You take Passage as input and classify that into 9 appropriate healthcare domain Categories from the given category list:\n",
      "['Medicine', 'Oncology', 'Metastasis', 'Breast cancer', 'Lung cancer', 'Cerebrospinal fluid', 'Tumor microenvironment', 'Single-cell RNA sequencing', 'Idiopathic intracranial hypertension'].\n",
      "Your output format is only [{'main class': Main Classification Category ,'1': 2nd level Classification Category, '2': 3rd level Classification Category,...,'branch' : Appropriate branch of the Passage ,'group': Appropriate Group of the Passage}] form, no other form.\n",
      "\n",
      "Examples:\n",
      "\n",
      "Input: Leptomeningeal metastases (LM) occur in patients with breast cancer (BC) and lung cancer (LC). The cerebrospinal fluid (CSF) tumour microenvironment (TME) of LM patients is not well defined at a single-cell level. We did an analysis based on single-cell RNA sequencing (scRNA-seq) data and four patient-derived CSF samples of idiopathic intracranial hypertension (IIH)\n",
      "Output: [[{\"main class\": \"Health\", \"1\": \"Medicine\", \"2\": \"Oncology\", \"3\": \"Metastasis\", \"4\": \"Breast cancer\", \"5\": \"Lung cancer\", \"6\": \"Cerebrospinal fluid\", \"7\": \"Tumor microenvironment\", \"8\": \"Single-cell RNA sequencing\", \"9\": \"Idiopathic intracranial hypertension\", \"branch\": \"Health\", \"group\": \"Clinical medicine\"}]]\n",
      "\n",
      "Input: The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\n",
      "Output:\n"
     ]
    }
   ],
   "source": [
    "prompt = nlp_prompter.generate_prompt('multilabel_classification.jinja',\n",
    "                                      n_output_labels=len(labels),\n",
    "                                      domain=\"healthcare\",\n",
    "                                      labels=labels,\n",
    "                                      examples=[examples[0]],\n",
    "                                      text_input=\"The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\",\n",
    "                                     )\n",
    "print(prompt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "output = nlp_prompter.fit('multilabel_classification.jinja',\n",
    "                                      n_output_labels=len(labels),\n",
    "                                      domain=\"healthcare\",\n",
    "                                      labels=labels,\n",
    "                                      examples=[examples[0]],\n",
    "                                      text_input=\"The patient is a 93-year-old female with a medical history of chronic right hip pain, osteoporosis, hypertension, depression, and chronic atrial fibrillation admitted for evaluation and management of severe nausea and vomiting and urinary tract infection\",\n",
    "                                     )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'prompt_tokens': 430,\n",
       " 'completion_tokens': 59,\n",
       " 'total_tokens': 489,\n",
       " 'text': \"\\n\\n[[{'main class': Main Classification Category ,'1': 2nd level Classification Category, '2': 3rd level Classification Category,...,'branch' : Appropriate branch of the Passage ,'group': Appropriate Group of the Passage}] form, no other form.\"}"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.6"
  },
  "vscode": {
   "interpreter": {
    "hash": "67ec57dda2d4cf896ecf03d091187d70ff0a6cd6e0feb9599db137f2d1cf3d9f"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
